import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import pandas as pd

import plotly
import plotly.graph_objects as go
import plotly.express as px

from plotly.offline import plot, iplot, init_notebook_mode


df = pd.read_csv('steam.csv')

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

df.head()
##    appid                       name  ...             owners  price
## 0     10             Counter-Strike  ...  10000000-20000000   7.19
## 1     20      Team Fortress Classic  ...   5000000-10000000   3.99
## 2     30              Day of Defeat  ...   5000000-10000000   3.99
## 3     40         Deathmatch Classic  ...   5000000-10000000   3.99
## 4     50  Half-Life: Opposing Force  ...   5000000-10000000   3.99
## 
## [5 rows x 18 columns]
df['appid'].sample()
## 21998    850780
## Name: appid, dtype: int64
df.isnull().sum()
## appid               0
## name                0
## release_date        0
## english             0
## developer           0
## publisher           0
## platforms           0
## required_age        0
## categories          0
## genres              0
## steamspy_tags       0
## achievements        0
## positive_ratings    0
## negative_ratings    0
## average_playtime    0
## median_playtime     0
## owners              0
## price               0
## dtype: int64
df['appid'].sample()
## 24558    936020
## Name: appid, dtype: int64
df.drop(columns='appid', inplace=True)
df['english'].unique() 
## array([1, 0], dtype=int64)
df['english'].unique() 
## array([1, 0], dtype=int64)
map_dict = {0: 'non-English', 1: 'English'}

df['english'] = df['english'].map(map_dict)
fig = go.Figure(data=[go.Pie(labels=df['english'].value_counts().index, 
                             values=df['english'].value_counts().values)])

fig.update_traces(textinfo='value', textfont_size=20,
                  marker=dict(colors=['salmon', 'lightblue'], 
                  line=dict(color='#000000', width=2)))

fig.update_layout(
    height=600, width=600, title_text='English and not English pie chart',
    xaxis_title='number of songs', yaxis_title='artist', title_x = 0.5,
    
    font=dict(
            family="Courier New, monospace",
            size=18,
            color="black"),
    
    legend=dict(
            orientation="h",
            yanchor="bottom",
            y=-0.2,
            xanchor="right",
            x=1)
)

fig.show()
import plotly.express as px
#df = px.data.tips()
fig = px.pie(df, values=df['english'].value_counts().values, names=df['english'].value_counts().index,
title='Jogos com idioma ingles',
color_discrete_map={'Thur':'lightcyan'})
fig.update_traces( textinfo='percent+label')
#fig.show()

temos que 98.1% dos games possui idioma ingles e apenas 1,89% nao possui este idioma

verificar a quantidade de proprietarios



df['owners'].value_counts().tail()
## 5000000-10000000       46
## 10000000-20000000      21
## 20000000-50000000       3
## 50000000-100000000      2
## 100000000-200000000     1
## Name: owners, dtype: int64

apenas 1 dos jogos possuem mais de cem milhões de proprietários(jogadores)

top_6_owners = df[
            (df['owners'] == '20000000-50000000') | 
            (df['owners'] == '50000000-100000000') | 
            (df['owners'] == '100000000-200000000')
          ].sort_values(by='owners')
          
print(top_6_owners)
          
##                                    name  ...  price
## 22                               Dota 2  ...   0.00
## 19                      Team Fortress 2  ...   0.00
## 1634                           Warframe  ...   0.00
## 3362                           Unturned  ...   0.00
## 25     Counter-Strike: Global Offensive  ...   0.00
## 12836     PLAYERUNKNOWN'S BATTLEGROUNDS  ...  26.99
## 
## [6 rows x 17 columns]

perceba que dota 2 é o jogo mais adquirido ou jogado na steam, um dos motivos pode ser seu preço(gratís).

df['release_date'] = pd.to_datetime(df['release_date'])
df['release_date'].head()
## 0   2000-11-01
## 1   1999-04-01
## 2   2003-05-01
## 3   2001-06-01
## 4   1999-11-01
## Name: release_date, dtype: datetime64[ns]

vamos verificar a quantiddade de jogos grátis


free, not_free = df[df['price'] == 0].shape[0], df[df['price'] != 0].shape[0]

labels = ['free', 'not free']

fig = px.pie(df, values=[free, not_free], names=labels,
title='relação de jogos gratuitos',
color_discrete_map={'Thur':'lightcyan'})
fig.update_traces( textinfo='percent+label')

          

apenas 9,46% dos games disponíveis são gratuitos

Vamos ver quantos jogos no Steam oferecem jogabilidade multiplayer e quantos jogos não oferecem esse recurso

import plotly.express as px

df['multiplayer'] = df['categories'].apply(lambda x: 'multi-player' in x.lower())
df.drop(columns='categories')
##                              name release_date  ... price multiplayer
## 0                  Counter-Strike   2000-11-01  ...  7.19        True
## 1           Team Fortress Classic   1999-04-01  ...  3.99        True
## 2                   Day of Defeat   2003-05-01  ...  3.99        True
## 3              Deathmatch Classic   2001-06-01  ...  3.99        True
## 4       Half-Life: Opposing Force   1999-11-01  ...  3.99        True
## ...                           ...          ...  ...   ...         ...
## 27070             Room of Pandora   2019-04-24  ...  2.09       False
## 27071                   Cyber Gun   2019-04-23  ...  1.69       False
## 27072            Super Star Blast   2019-04-24  ...  3.99        True
## 27073  New Yankee 7: Deer Hunters   2019-04-17  ...  5.19       False
## 27074                   Rune Lord   2019-04-24  ...  5.19       False
## 
## [27075 rows x 17 columns]
valor=df['multiplayer'].value_counts().values



fig = px.bar(df, x=['No-multiplayer','Multiplayer'], y= valor, title="Wide-Form Input")
fig.show()

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.